demo sn10x, Colon

(only 1/2 datasize to standard level)

Baf53cre ENS neurons,
CR infection 7d, CTL x4 CKO(Ahr-cko) x4

loading 60k cells for all,
cellranger called 24,758 cells

load dependancies

load 10x data

filt.10x <- Read10X(data.dir = "../output_demo/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 24758
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAATTCAG-1 AAACCCAAGCGTATGG-1 AAACCCAAGGCAGGTT-1
## Xkr4                     1                  3                  2
## Gm1992                   .                  .                  1
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGGTTGGTG-1 AAACCCACAAATCGTC-1 AAACCCACAAGGGCAT-1
## Xkr4                     1                  3                  1
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     8 24758
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##       AAACCCAAGAATTCAG-1 AAACCCAAGCGTATGG-1 AAACCCAAGGCAGGTT-1
## CTL.1                118                 98                 92
## CTL.2                 22                 13                 19
## CTL.3                 24                 92                 26
## CTL.4                106                 69                 34
## CKO.1                 49                 41                 36
## CKO.2                359                 53                 44
## CKO.3                 76                364                198
## CKO.4                220                735                164
##       AAACCCAAGGTTGGTG-1 AAACCCACAAATCGTC-1 AAACCCACAAGGGCAT-1
## CTL.1                 96                112                 96
## CTL.2                 16                 25                 21
## CTL.3                 17                 15                 25
## CTL.4                 48                 31                 23
## CKO.1                 38                 50                 54
## CKO.2                380                 64                 38
## CKO.3                 71                232                289
## CKO.4                167                206                192
rowSums(FB)
##   CTL.1   CTL.2   CTL.3   CTL.4   CKO.1   CKO.2   CKO.3   CKO.4 
## 5238102  901560 1048906 1403495 1923599 3230527 3292731 7463190
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 
## 24757 24706 24738 24746 24736 24755 24758 24758

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "CR7d_LI")

FB.seur
## An object of class Seurat 
## 8 features across 24758 samples within 1 assay 
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CTL.1" "CTL.2" "CTL.3" "CTL.4" "CKO.1" "CKO.2" "CKO.3" "CKO.4"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
                                               2,7,12,17
                                              
                                               )]


level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 6)

scales::show_col(color.FB, ncol = 4)

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

range ‘q’

q0.50 ~ 0.99
table.demux
##    quantile Doublet CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 Negative
## 1      0.50   23881    80   128   110    84    83    89    94    82      127
## 2      0.51   23871    80   129   110    86    85    90    97    82      128
## 3      0.52   23831    82   131   115    96    87    95   103    86      132
## 4      0.53   23817    83   134   120    97    88    95   105    87      132
## 5      0.54   23765    89   140   129    98    97   104   108    89      139
## 6      0.55   23718    99   148   125   100   102   107   112    97      150
## 7      0.56   23652   105   143   138   105   111   115   122   107      160
## 8      0.57   23595   115   152   149   110   112   119   127   112      167
## 9      0.58   23574   117   157   150   113   117   120   130   112      168
## 10     0.59   23503   118   169   150   124   129   128   135   123      179
## 11     0.60   23474   121   172   157   129   129   130   138   125      183
## 12     0.61   23410   128   181   169   132   133   140   143   136      186
## 13     0.62   23307   135   182   184   149   142   152   154   152      201
## 14     0.63   23262   137   188   180   159   149   158   157   157      211
## 15     0.64   23195   140   196   189   166   159   167   164   164      218
## 16     0.65   23144   144   207   198   175   158   174   165   169      224
## 17     0.66   23047   155   208   204   189   165   189   181   184      236
## 18     0.67   22934   169   224   219   199   177   208   192   195      241
## 19     0.68   22893   174   227   223   204   187   210   199   196      245
## 20     0.69   22797   182   249   237   213   194   223   209   201      253
## 21     0.70   22728   188   255   240   224   207   229   219   206      262
## 22     0.71   22568   205   258   267   249   221   249   241   220      280
## 23     0.72   22493   208   268   279   248   229   257   256   228      292
## 24     0.73   22386   222   280   289   265   236   268   270   237      305
## 25     0.74   22305   234   297   302   269   240   278   281   243      309
## 26     0.75   22140   259   303   306   291   265   295   303   271      325
## 27     0.76   22040   269   316   323   310   274   305   312   276      333
## 28     0.77   21891   277   344   348   317   281   330   330   293      347
## 29     0.78   21707   297   347   368   345   305   356   350   310      373
## 30     0.79   21545   315   376   396   351   318   375   376   318      388
## 31     0.80   21357   332   410   420   372   340   399   396   334      398
## 32     0.81   21195   345   411   440   398   363   414   417   361      414
## 33     0.82   20978   362   445   461   415   382   434   449   381      451
## 34     0.83   20848   375   469   485   435   389   444   455   392      466
## 35     0.84   20545   410   493   529   465   419   484   496   423      494
## 36     0.85   20370   431   523   552   486   432   503   512   435      514
## 37     0.86   20094   456   539   584   520   475   531   546   461      552
## 38     0.87   19883   477   576   606   533   497   550   574   485      577
## 39     0.88   19518   518   604   660   577   538   596   615   522      610
## 40     0.89   19272   544   643   691   599   568   622   643   540      636
## 41     0.90   18823   596   680   746   647   621   674   694   592      685
## 42     0.91   18441   638   705   782   695   668   715   752   624      738
## 43     0.92   18013   689   763   826   748   701   763   797   662      796
## 44     0.93   17535   744   807   884   800   747   803   862   719      857
## 45     0.94   16913   818   878   956   857   812   869   938   791      926
## 46     0.95   16161   885   944  1036   927   890   972  1022   894     1027
## 47     0.96   15334   971  1012  1133  1022   991  1053  1122   969     1151
## 48     0.97   14456  1059  1114  1231  1086  1085  1148  1212  1043     1324
## 49     0.98   13099  1193  1253  1403  1203  1206  1317  1349  1155     1580
## 50     0.99   11326  1336  1412  1597  1351  1353  1506  1545  1268     2064
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

q0.9900 ~ 0.9999
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

demultiplexing

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 191 reads
## Cutoff for CTL.2 : 33 reads
## Cutoff for CTL.3 : 45 reads
## Cutoff for CTL.4 : 55 reads
## Cutoff for CKO.1 : 73 reads
## Cutoff for CKO.2 : 129 reads
## Cutoff for CKO.3 : 109 reads
## Cutoff for CKO.4 : 301 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    11326     2064    11368
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    11326     2064     1336     1412     1597     1351     1353     1506 
##    CKO.3    CKO.4 
##     1545     1268
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CTL.1 : 199 reads
## Cutoff for CTL.2 : 34 reads
## Cutoff for CTL.3 : 46 reads
## Cutoff for CTL.4 : 57 reads
## Cutoff for CKO.1 : 76 reads
## Cutoff for CKO.2 : 135 reads
## Cutoff for CKO.3 : 113 reads
## Cutoff for CKO.4 : 312 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10851     2207    11700
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    10851     2207     1375     1458     1640     1381     1391     1551 
##    CKO.3    CKO.4 
##     1590     1314
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CTL.1 : 209 reads
## Cutoff for CTL.2 : 36 reads
## Cutoff for CTL.3 : 49 reads
## Cutoff for CTL.4 : 60 reads
## Cutoff for CKO.1 : 79 reads
## Cutoff for CKO.2 : 142 reads
## Cutoff for CKO.3 : 118 reads
## Cutoff for CKO.4 : 325 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10217     2436    12105
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    10217     2436     1464     1506     1686     1422     1444     1597 
##    CKO.3    CKO.4 
##     1637     1349
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CTL.1 : 223 reads
## Cutoff for CTL.2 : 38 reads
## Cutoff for CTL.3 : 52 reads
## Cutoff for CTL.4 : 63 reads
## Cutoff for CKO.1 : 85 reads
## Cutoff for CKO.2 : 153 reads
## Cutoff for CKO.3 : 125 reads
## Cutoff for CKO.4 : 345 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9524     2755    12479
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     9524     2755     1524     1549     1727     1481     1481     1650 
##    CKO.3    CKO.4 
##     1701     1366
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CTL.1 : 247 reads
## Cutoff for CTL.2 : 42 reads
## Cutoff for CTL.3 : 58 reads
## Cutoff for CTL.4 : 69 reads
## Cutoff for CKO.1 : 94 reads
## Cutoff for CKO.2 : 171 reads
## Cutoff for CKO.3 : 137 reads
## Cutoff for CKO.4 : 377 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     8385     3303    13070
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8385     3303     1628     1661     1780     1524     1565     1730 
##    CKO.3    CKO.4 
##     1774     1408
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.999)
## Cutoff for CTL.1 : 271 reads
## Cutoff for CTL.2 : 46 reads
## Cutoff for CTL.3 : 63 reads
## Cutoff for CTL.4 : 76 reads
## Cutoff for CKO.1 : 103 reads
## Cutoff for CKO.2 : 190 reads
## Cutoff for CKO.3 : 149 reads
## Cutoff for CKO.4 : 409 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     7484     3927    13347
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     7484     3927     1669     1663     1828     1521     1607     1799 
##    CKO.3    CKO.4 
##     1821     1439
# manual cutoffs based on default values
# 
# CTL.1  q0.998  247
# CTL.2  q0.998   42
# CTL.3  q0.998   58
# CTL.4  q0.998   69   
# CKO.1  q0.998   94
# CKO.2  q0.998  171
# CKO.3  q0.998  137
# CKO.4  q0.998  377
#


cutoff.FB <- c(247,42,58,69,
               94,171,137,377)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 
##   247    42    58    69    94   171   137   377
par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 24758     2
df.FB[1:10,]
##                    HTO_classification.global hash.ID
## AAACCCAAGAATTCAG-1                   Doublet Doublet
## AAACCCAAGCGTATGG-1                   Doublet Doublet
## AAACCCAAGGCAGGTT-1                   Singlet   CKO.3
## AAACCCAAGGTTGGTG-1                   Singlet   CKO.2
## AAACCCACAAATCGTC-1                   Singlet   CKO.3
## AAACCCACAAGGGCAT-1                   Singlet   CKO.3
## AAACCCACACCAGGTC-1                   Doublet Doublet
## AAACCCACACGGCACT-1                   Singlet   CKO.4
## AAACCCACAGCAGTCC-1                   Doublet Doublet
## AAACCCACAGCATTGT-1                   Doublet Doublet
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
##                  Doublet     8385        0     0     0     0     0     0     0
##                  Negative       0     3303     0     0     0     0     0     0
##                  Singlet        0        0  1628  1660  1769  1524  1562  1714
##                          hash.ID
## HTO_classification.global CKO.3 CKO.4
##                  Doublet      0     0
##                  Negative     0     0
##                  Singlet   1795  1418
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAATTCAG-1    CR7d_LI        974            8        974            8
## AAACCCAAGCGTATGG-1    CR7d_LI       1465            8       1465            8
## AAACCCAAGGCAGGTT-1    CR7d_LI        613            8        613            8
## AAACCCAAGGTTGGTG-1    CR7d_LI        833            8        833            8
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAATTCAG-1     CKO.2        CTL.4  0.6061048        CKO.2_CTL.4
## AAACCCAAGCGTATGG-1     CKO.3        CTL.3  0.1276493        CKO.3_CTL.3
## AAACCCAAGGCAGGTT-1     CKO.3        CTL.3  0.4757557              CKO.3
## AAACCCAAGGTTGGTG-1     CKO.2        CTL.4  1.1542604              CKO.2
##                    HTO_classification.global hash.ID
## AAACCCAAGAATTCAG-1                   Doublet Doublet
## AAACCCAAGCGTATGG-1                   Doublet Doublet
## AAACCCAAGGCAGGTT-1                   Singlet   CKO.3
## AAACCCAAGGTTGGTG-1                   Singlet   CKO.2
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,23500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0618.seur.subset.rds")
FB.seur.subset <- readRDS("FB0618.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID.sort)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8385     3303     1628     1660     1769     1524     1562     1714 
##    CKO.3    CKO.4 
##     1795     1418
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "CR7d_LI")
GEX.seur
## An object of class Seurat 
## 20978 features across 24758 samples within 1 assay 
## Active assay: RNA (20978 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8385     3303     1628     1660     1769     1524     1562     1714 
##    CKO.3    CKO.4 
##     1795     1418
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative" )
GEX.seur
## An object of class Seurat 
## 20978 features across 13070 samples within 1 assay 
## Active assay: RNA (20978 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

colon neuron, there’s a strange low enrichment distribution under nFeature 500

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA > 200 & nFeature_RNA < 1800 & nCount_RNA < 3600)
GEX.seur
## An object of class Seurat 
## 20978 features across 13055 samples within 1 assay 
## Active assay: RNA (20978 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,10800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+675,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,2800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+175,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: Cdca7, Uhrf1,
## Chaf1b, E2f8, not searching for symbol synonyms
## Warning: The following features are not present in the object: Cdk1, Ube2c,
## Top2a, Aurkb, Bub1, Gtse1, Cdc25c, Kif2c, Dlgap5, Hmmr, Nek2, not searching for
## symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

nearly no cycling !

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Cntnap2"       "Mgat4c"        "Zfp804a"       "Cntn5"        
##   [5] "Nmu"           "Robo2"         "Cpne4"         "Sgcz"         
##   [9] "Gm32647"       "Lingo2"        "Cdh8"          "Kcnb2"        
##  [13] "Klhl1"         "Sst"           "Gm30613"       "4930432L08Rik"
##  [17] "Pcdh9"         "Pcdh11x"       "Dgkg"          "Gm21847"      
##  [21] "Myh11"         "Adgrg6"        "Ntng1"         "Kctd16"       
##  [25] "Csmd1"         "Gm29521"       "Vip"           "Gpr149"       
##  [29] "Brinp3"        "Nrxn3"         "Nrg1"          "Ebf1"         
##  [33] "Grm7"          "Tmeff2"        "Car10"         "Astn2"        
##  [37] "Kirrel3"       "Gm26612"       "Sema3e"        "Asic2"        
##  [41] "Gpc6"          "Mmrn1"         "Galntl6"       "Chsy3"        
##  [45] "Wdr17"         "Ano5"          "Gal"           "Kcnq5"        
##  [49] "Ano2"          "Gpm6a"         "Prkg1"         "Cenpf"        
##  [53] "Unc5d"         "Rbfox1"        "Dgki"          "Rab27b"       
##  [57] "Gm15680"       "Cdh18"         "Gpc5"          "Schip1"       
##  [61] "March1"        "Cdh9"          "Gm30094"       "Pdzrn4"       
##  [65] "Plxna4"        "Itgb6"         "Gm12296"       "Gm15261"      
##  [69] "Chrm3"         "M1ap"          "B230323A14Rik" "Tac1"         
##  [73] "Gm47456"       "Tafa1"         "Plcl1"         "Pbx3"         
##  [77] "Kcnh7"         "Cpa6"          "Grik1"         "Lrp1b"        
##  [81] "Rbpms"         "Spock3"        "1700111E14Rik" "L3mbtl4"      
##  [85] "Trps1"         "A330008L17Rik" "Xirp2"         "Ptprt"        
##  [89] "Tafa2"         "Clstn2"        "D030068K23Rik" "Fut9"         
##  [93] "Grin3a"        "Ntrk3"         "Pcdh10"        "Opcml"        
##  [97] "Zfp804b"       "mt-Co3"        "Col25a1"       "Dcc"          
## [101] "Ccbe1"         "Grm8"          "Nxph2"         "Pde7b"        
## [105] "Nxph1"         "Zfhx3"         "Cacna2d3"      "Grp"          
## [109] "Pde4d"         "Cdh6"          "4930509J09Rik" "Reln"         
## [113] "mt-Atp6"       "Rfx6"          "Penk"          "St8sia2"      
## [117] "Cdh19"         "Gna14"         "Shisa6"        "Adgrl4"       
## [121] "Scnn1a"        "Gm38405"       "9530059O14Rik" "Gm16685"      
## [125] "Fstl4"         "Myl1"          "Kctd8"         "Gm28153"      
## [129] "Unc13c"        "Unc5c"         "Kcnd2"         "Vgll3"        
## [133] "Alk"           "Actg2"         "AC150683.1"    "1700034P13Rik"
## [137] "Inpp4b"        "Calcb"         "Cysltr2"       "Gm20757"      
## [141] "Robo1"         "Trhde"         "Erbb4"         "Smtn"         
## [145] "Adarb2"        "Dgkb"          "Piezo2"        "Kif26b"       
## [149] "6330411D24Rik" "Gm49127"       "Prox1"         "Cmah"         
## [153] "Sema5a"        "Dock10"        "Abca9"         "Skap1"        
## [157] "Hpse2"         "Gm11339"       "Casp8"         "Dlc1"         
## [161] "Tenm4"         "Trp53cor1"     "Avil"          "Gm20754"      
## [165] "Cfh"           "Efr3a"         "Gm29282"       "Gm13376"      
## [169] "Bloc1s6os"     "Rbm46"         "Muc16"         "Ndufa4l2"     
## [173] "Gm2670"        "Ccdc68"        "Gm34517"       "Chrna9"       
## [177] "Lgals9"        "Galnt18"       "1700063D05Rik" "Gm49678"      
## [181] "Cntn4"         "E130310I04Rik" "Hcn1"          "Gm826"        
## [185] "Bmpr1b"        "Dock1"         "Dlgap1"        "9530026P05Rik"
## [189] "Slc4a4"        "Gm12648"       "Foxp2"         "Nell1"        
## [193] "Luzp2"         "Dab1"          "Gm4128"        "Stac"         
## [197] "5530401A14Rik" "Kcnmb2"        "Gm20713"       "Il1rapl1"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Nos1, Cmah, Epha5, Cacnb2, Rgs6, Thsd7a, Hmcn1, Slc44a5, Gfra1, Alcam 
##     Thsd7b, Lrrc4c, Dgkb, Syn3, Etv1, Cadps2, Dpp10, Rarb, Stxbp6, Vwa5b1 
##     Creb5, Auts2, Col25a1, Rgs7, Synpo2, Bves, Dach2, Popdc3, Trpc4, Arhgap15 
## Negative:  Tafa1, Rbfox1, Gpc6, Pde4b, Unc5c, Alk, Plxna4, Plcl1, Ptprt, Bnc2 
##     Stxbp5l, Lingo2, Tafa2, Mgat4c, Pbx1, Cpne8, Nell1, Grik1, Pbx3, Cdh8 
##     Pld5, Unc5d, Cntnap2, Kalrn, Kctd8, Cdh18, Zfhx3, Grm7, Chsy3, Ccbe1 
## PC_ 2 
## Positive:  Auts2, Mgat4c, Sgcz, Robo2, Cdh8, Zfp804a, Tmeff2, Dgkg, Kcnb2, Dgkb 
##     Nmu, Ano2, Kcnq5, Schip1, Slc4a4, Cntn5, Astn2, Cpne4, Brinp3, Ntrk3 
##     Gpr149, Myl1, Cdh6, Fgf14, Itgb6, Zfhx3, Pbx3, Clstn2, Ntng1, Plcl1 
## Negative:  Grik1, Bnc2, Galntl6, Tshz2, Pcdh7, Ptprt, Pde4b, Tox, Dlgap2, Cdc14a 
##     Lrp1b, Plcxd3, Alk, Nkain2, Tafa2, St6galnac3, Arhgap24, Tmem132c, Oprk1, Gfra2 
##     Cacna1e, Olfm3, Brinp2, Pld5, Rbfox1, Nxph1, Fhit, Satb1, Colec12, Unc5d 
## PC_ 3 
## Positive:  Dlgap1, Tenm2, Schip1, Pde4d, Kctd16, Dock10, Kctd8, L3mbtl4, Skap1, Unc5d 
##     Egfem1, Stac, Kcnd2, Nhs, Grm7, Dmd, Tac1, Kirrel3, Fut9, Sybu 
##     Sema3e, Pde7b, Plcb1, Arhgef3, Egfr, Col27a1, Asic2, Pde1c, Pbx3, Col25a1 
## Negative:  Cpne4, Tshz2, Cdh8, Ccbe1, Ptprt, Chsy3, Mgat4c, Nmu, Nrxn3, Ano2 
##     Ntrk3, Tcf7l2, B3galt1, Plxna4, Gpr149, Cux2, Dgkg, Nfia, Pcdh9, Itgb6 
##     Slc2a13, Galnt17, Scube1, Clstn2, Calcb, Vgll3, Rbfox1, Ptprd, Cdc14a, Dgki 
## PC_ 4 
## Positive:  Car10, Nxph1, Cntn5, Synpr, Epha5, Lrp1b, Dmd, Unc5d, Csmd3, Prkg1 
##     Zfp804b, Dock10, Pcdh9, Fstl4, Kctd8, Erbb4, Cntnap2, Calcrl, Epha6, Pcdh11x 
##     Olfm3, Pde1c, Nmu, Galntl6, Cdh8, Pcdh10, Cacna2d3, Spock3, Lsamp, Rtl4 
## Negative:  Ntng1, Klhl1, Tafa2, Kcnh7, Gna14, Nxph2, Zfhx3, Kif26b, Flrt2, Bmpr1b 
##     Pbx3, Ano5, Enox1, Csmd1, Skap1, Dlc1, L3mbtl4, Tmtc2, Trpm3, Galnt18 
##     Tnr, Trps1, Pbx1, Slc44a5, Zbbx, Pcdh7, Tenm4, Sez6l, Serpini1, Pakap 
## PC_ 5 
## Positive:  Ntng1, Kcnd2, Trps1, Cntn5, Klhl1, Sgcz, Csmd3, Ebf1, Csmd1, Nkain2 
##     Cdh18, Gna14, Pcdh7, Kif26b, Spock3, Gng2, Rbfox1, Tnr, Schip1, Nxph2 
##     Bmpr1b, Kcnq5, Camk4, Cacna1e, Trhde, Slit2, Spock1, Alk, Nrg1, Cpne8 
## Negative:  Sema3e, Pde7b, Kctd16, Fut9, Egfr, Lingo2, Fras1, Prkg1, Grm8, Nfatc1 
##     L3mbtl4, P3h2, Mgll, Shisa6, Camk1d, Stac, Eepd1, Tac1, Col27a1, Lmo7 
##     Met, Hcn1, Ntn1, Lamc3, Egfros, Dock1, Fam189a1, Pbx1, Snx7, Grm7
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1112
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Cntnap2"   "Mgat4c"    "Zfp804a"   "Cntn5"     "Nmu"       "Robo2"    
##   [7] "Cpne4"     "Sgcz"      "Lingo2"    "Cdh8"      "Kcnb2"     "Klhl1"    
##  [13] "Sst"       "Pcdh9"     "Pcdh11x"   "Dgkg"      "Myh11"     "Adgrg6"   
##  [19] "Ntng1"     "Kctd16"    "Csmd1"     "Vip"       "Gpr149"    "Brinp3"   
##  [25] "Nrxn3"     "Nrg1"      "Ebf1"      "Grm7"      "Tmeff2"    "Car10"    
##  [31] "Astn2"     "Kirrel3"   "Sema3e"    "Asic2"     "Gpc6"      "Mmrn1"    
##  [37] "Galntl6"   "Chsy3"     "Wdr17"     "Ano5"      "Gal"       "Kcnq5"    
##  [43] "Ano2"      "Gpm6a"     "Prkg1"     "Unc5d"     "Rbfox1"    "Dgki"     
##  [49] "Rab27b"    "Cdh18"     "Gpc5"      "Schip1"    "March1"    "Cdh9"     
##  [55] "Pdzrn4"    "Plxna4"    "Itgb6"     "Chrm3"     "M1ap"      "Tac1"     
##  [61] "Tafa1"     "Plcl1"     "Pbx3"      "Kcnh7"     "Cpa6"      "Grik1"    
##  [67] "Lrp1b"     "Rbpms"     "Spock3"    "L3mbtl4"   "Trps1"     "Xirp2"    
##  [73] "Ptprt"     "Tafa2"     "Clstn2"    "Fut9"      "Grin3a"    "Ntrk3"    
##  [79] "Pcdh10"    "Opcml"     "Zfp804b"   "Col25a1"   "Dcc"       "Ccbe1"    
##  [85] "Grm8"      "Nxph2"     "Pde7b"     "Nxph1"     "Zfhx3"     "Cacna2d3" 
##  [91] "Grp"       "Pde4d"     "Cdh6"      "Reln"      "Rfx6"      "Penk"     
##  [97] "St8sia2"   "Cdh19"     "Gna14"     "Shisa6"    "Adgrl4"    "Scnn1a"   
## [103] "Fstl4"     "Myl1"      "Kctd8"     "Unc13c"    "Unc5c"     "Kcnd2"    
## [109] "Vgll3"     "Alk"       "Actg2"     "Inpp4b"    "Calcb"     "Cysltr2"  
## [115] "Robo1"     "Trhde"     "Erbb4"     "Smtn"      "Adarb2"    "Dgkb"     
## [121] "Piezo2"    "Kif26b"    "Prox1"     "Cmah"      "Sema5a"    "Dock10"   
## [127] "Abca9"     "Skap1"     "Hpse2"     "Casp8"     "Dlc1"      "Tenm4"    
## [133] "Trp53cor1" "Avil"      "Cfh"       "Efr3a"     "Bloc1s6os" "Rbm46"    
## [139] "Muc16"     "Ndufa4l2"  "Ccdc68"    "Chrna9"    "Lgals9"    "Galnt18"  
## [145] "Cntn4"     "Hcn1"      "Bmpr1b"    "Dock1"     "Dlgap1"    "Slc4a4"   
## [151] "Foxp2"     "Nell1"     "Luzp2"     "Dab1"      "Stac"      "Kcnmb2"   
## [157] "Il1rapl1"  "Rmst"      "Csmd3"     "Pde1a"     "Etl4"      "Pde4b"    
## [163] "Tenm2"     "Mast4"     "Cux2"      "B3galt1"   "Serpini1"  "Serpinb8" 
## [169] "Maf"       "Prdm6"     "Cadm2"     "Tacr1"     "Rarb"      "Egfem1"   
## [175] "Ssc4d"     "Rtl4"      "Grm1"      "Tnr"       "Clca3a2"   "Fgf14"    
## [181] "Eya1"      "Itih5"     "Mecom"     "Bank1"     "Cyp2j13"   "Smpdl3b"  
## [187] "Fbxw16"    "Gabra1"    "Kcnh3"     "Meltf"     "Cd86"      "Sorcs1"   
## [193] "Pcdh7"     "Pgm5"      "Fhit"      "Nos1"      "Egflam"    "Acss1"    
## [199] "Thsd4"     "Olfr889"   "Cdh12"     "Dcn"       "Hpgd"      "Prune2"   
## [205] "Egfr"      "Galr1"     "Syt10"     "Moxd1"     "Rgs6"      "Npas3"    
## [211] "Dach1"     "Cerkl"     "Ano1"      "Thsd7a"    "Met"       "Ghr"      
## [217] "Adam33"    "Eepd1"     "Adamts9"   "Mgarp"     "Exoc3l2"   "Nlrp3"    
## [223] "Rasgrf1"   "Camk1d"    "Cntn3"     "Dpp10"     "Ntm"       "Ptprk"    
## [229] "Htr2c"     "Ntrk2"     "Pdyn"      "Olfm3"     "Serpinb5"  "Serpini2" 
## [235] "Gbp2"      "Gimap6"    "Bcat1"     "Lrrc37a"   "Mei1"      "Anxa1"    
## [241] "Pik3ap1"   "Flrt2"     "Slit3"     "P3h2"      "Synpr"     "Arhgap6"  
## [247] "Adam2"     "Nog"       "Platr7"    "Ephb1"     "Egfros"    "Rerg"     
## [253] "Gng2"      "Hmcn1"     "Lama2"     "Olfr78"    "Ror1"      "Fbxl7"    
## [259] "Hs3st4"    "Cntnap5b"  "Calb1"     "Satb2"     "Crispld1"  "Kcnn2"    
## [265] "Pak7"      "Slc35f4"   "Stab2"     "Bnc2"      "Tcf7l2"    "C79798"   
## [271] "Cftr"      "Thsd7b"    "Slc2a13"   "Abca8a"    "Arhgap28"  "Ttc29"    
## [277] "Fendrr"    "Lrrc18"    "Col22a1"   "Ptcra"     "Mep1b"     "Adamts13" 
## [283] "Ier5l"     "Pdgfra"    "Mir9-3hg"  "Smim31"    "Spag5"     "Cfap53"   
## [289] "Col12a1"   "Vcan"      "Tmem132c"  "Palld"     "Nfatc1"    "Gabrb1"   
## [295] "Folh1"     "Rasgef1b"  "Sorcs3"    "Galnt5"    "Pakap"     "Tmem132d"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13055
## Number of edges: 544702
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7949
## Number of communities: 27
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:57:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:57:27 Read 13055 rows and found 25 numeric columns
## 21:57:27 Using Annoy for neighbor search, n_neighbors = 20
## 21:57:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:57:28 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpgLNYVE\file607824eb3f72
## 21:57:28 Searching Annoy index using 1 thread, search_k = 2000
## 21:57:31 Annoy recall = 100%
## 21:57:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:57:33 Initializing from normalized Laplacian + noise (using irlba)
## 21:57:33 Commencing optimization for 200 epochs, with 385114 positive edges
## 21:57:46 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
                       levels = c("CTL",
                                  "CKO"))
color.cnt <- color.FB[c(1,5)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

DoubletFinder

library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
  if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
    if(i!=1){
      sweep.res.list[[i]] <- sweep.res.list[[i-1]]
    }else(
      sweep.res.list[[i]] <- sweep.res.list[[i+1]]
    )
  }
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number     
nExp_poi <- round(0.05*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4352 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number     
nExp_poi <- round(0.1*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4352 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

check

markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Pgm5","Chgb",
                       "Nxph1",
                       "Gabrb1","Glp2r","Nebl",
                       "Lrrc7",
                       "Ryr3","Eda",
                       "Hgf","Lama2","Efnb2",
                       "Tac1",
                       "Kctd8",
                       "Ptn",
                       "Ntrk2","Penk","Itgb8",
                       "Fut9","Nfatc1","Egfr","Pdia5",
                       "Ahr","Mgll",
                       "Aff3",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2",
                       "Col25a1",
                       "Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5","Mid1","Igfbp5",
                       "Ppara",
                       "Pcdh11x","Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Nova1",
                      "Cdh10","Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Itgb6","Met","Itgbl1",
                        
                        "Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9",
                        "Car10","Dcc",
                        "Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 check0407 = c("Gcg","Glp2r","Insl5","Nts",
               "Ntsr1","Ntsr2","Pyy",#"Sst",
               "Gip","Sct","Sctr","Ghrl"),
               others = c("Sox10","Plp1","Gfap","Rxrg",
                          "Acta2","Myh11","Pdgfra","Bmp5"))
check0407 = c("Gcg","Glp2r","Insl5","Nts",
               "Ntsr1","Ntsr2","Pyy","Sst",
               "Gip","Sct","Sctr","Ghrl")
check.markers.ss <- as.vector(unlist(markers.new.ss))
check.markers.ss
##   [1] "Chat"     "Bnc2"     "Tox"      "Ptprt"    "Gfra2"    "Oprk1"   
##   [7] "Adamtsl1" "Fbxw15"   "Fbxw24"   "Chrna7"   "Satb1"    "Itga6"   
##  [13] "Cntnap5b" "Pgm5"     "Chgb"     "Nxph1"    "Gabrb1"   "Glp2r"   
##  [19] "Nebl"     "Lrrc7"    "Ryr3"     "Eda"      "Hgf"      "Lama2"   
##  [25] "Efnb2"    "Tac1"     "Kctd8"    "Ptn"      "Ntrk2"    "Penk"    
##  [31] "Itgb8"    "Fut9"     "Nfatc1"   "Egfr"     "Pdia5"    "Ahr"     
##  [37] "Mgll"     "Aff3"     "Chrm3"    "Nos1"     "Kcnab1"   "Gfra1"   
##  [43] "Etv1"     "Man1a"    "Airn"     "Adcy2"    "Col25a1"  "Cmah"    
##  [49] "Creb5"    "Vip"      "Pde1a"    "Ebf1"     "Gpc5"     "Mid1"    
##  [55] "Igfbp5"   "Ppara"    "Pcdh11x"  "Adcy8"    "Grp"      "Npas3"   
##  [61] "Synpr"    "St18"     "Gal"      "Nova1"    "Cdh10"    "Kcnk13"  
##  [67] "Neurod6"  "Moxd1"    "Sctr"     "Piezo1"   "Vipr2"    "Adamts9" 
##  [73] "Sst"      "Kcnn2"    "Calb2"    "Adcy1"    "Calcb"    "Nmu"     
##  [79] "Adgrg6"   "Pcdh10"   "Ngfr"     "Galr1"    "Il7"      "Aff2"    
##  [85] "Gpr149"   "Itgb6"    "Met"      "Itgbl1"   "Cdh6"     "Cdh8"    
##  [91] "Clstn2"   "Ano2"     "Ntrk3"    "Cpne4"    "Vwc2l"    "Cdh9"    
##  [97] "Car10"    "Dcc"      "Scgn"     "Vcan"     "Cck"      "Piezo2"  
## [103] "Kcnh7"    "Rerg"     "Bmpr1b"   "Skap1"    "Ntng1"    "Tafa2"   
## [109] "Nxph2"    "Gcg"      "Glp2r"    "Insl5"    "Nts"      "Ntsr1"   
## [115] "Ntsr2"    "Pyy"      "Gip"      "Sct"      "Sctr"     "Ghrl"    
## [121] "Sox10"    "Plp1"     "Gfap"     "Rxrg"     "Acta2"    "Myh11"   
## [127] "Pdgfra"   "Bmp5"
check.markers.sss <- check.markers.ss[check.markers.ss %in% rownames(GEX.seur)]
pm.All_raw <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "seurat_clusters",assay = "RNA",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_raw

FeaturePlot(GEX.seur, features = c(#"Chat"
                                   "Bnc2","Fut9","Nos1","Vip",
                                   "Gal","Moxd1","Sst","Calcb",
                                   "Nmu","Cdh9","Piezo2","Nxph2"),
            ncol = 4)

VlnPlot(GEX.seur, features = c("Cdh8","Cdh9"), group.by = "seurat_clusters",ncol = 1)

sort_clusters

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(6,16,10,14,  3, 5, 12, 17,      # EMN
                                            1,8,0,11, 15,4,20, 18, 7,       # IMN
                                            22, 23, 2,21,           # IN
                                            9, 25, 19, 13,     # IPAN
                                            24,   # Mix
                                            26   # Glia/Fib/SMC (also as contaminated mix)
                                            ))
pm.All_sort <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "sort_clusters",assay = "RNA",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_sort

preAnno

GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% c(6,16,10,14)] <- "EMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(3)] <- "EMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(5)] <- "EMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(12)] <- "EMN4"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(17)] <- "EMN5"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(1,8,0,11)] <- "IMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15,4,20)] <- "IMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(18)] <- "IMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(7)] <- "IMN4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "IN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(23)] <- "IN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(2,21)] <- "IN3"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(9)] <- "IPAN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(25)] <- "IPAN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(19)] <- "IPAN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13)] <- "IPAN4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(24,26)] <- "Mix"


GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c(paste0("EMN",1:5),
                                      paste0("IMN",1:4),
                                      paste0("IN",1:3),
                                      paste0("IPAN",1:4),
                                      "Mix"))
color.pre <-c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
              "#BF7E6B","#D46B35","#F19258","#FF8080",
              "#BDAE8D","#BD66C4","#C03778",
              "#97BA59","#DFAB16","#2BA956","#9FE727",
              "red")
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

pm.All_pre <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "preAnno",assay = "RNA",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_pre

#saveRDS(GEX.seur, "./GEX0925.seur.preAnno.rds")

check pure

GEX.pure <- subset(GEX.seur,subset = preAnno != "Mix" & DoubletFinder0.1 == "Singlet")
GEX.pure
## An object of class Seurat 
## 20978 features across 11666 samples within 1 assay 
## Active assay: RNA (20978 features, 1112 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
  DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

pm.All_pre <- DotPlot(GEX.pure, features = rev(unique(rev(check.markers.sss))), group.by = "preAnno",assay = "RNA",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_pre

composition

GEX.pure$rep <- paste0("rep",
                        gsub("CTL|CKO","",as.character(GEX.pure$FB.info)))
GEX.pure$FB.info <- factor(as.character(GEX.pure$FB.info),
                           levels = c(paste0("CTL.",1:4),
                                      paste0("CKO.",1:4)))

GEX.pure$preAnno <- factor(as.character(GEX.pure$preAnno),
                           levels = levels(GEX.seur$preAnno)[1:16])

heatmap

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
      clusters=GEX.pure$preAnno),
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(5,9,12),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
                                clusters=GEX.pure$preAnno)),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(5,9,12),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot

stat_preAnno <- GEX.pure@meta.data[,c("cnt","rep","preAnno")]

stat_preAnno.s <- stat_preAnno %>%
  group_by(cnt,rep,preAnno) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
  ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title="Cell Composition of Colon data, preAnno: CR7d CTL vs CKO", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.preAnno

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.preAnno <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      total.N <- stat_preAnno.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
                                            "_",
                                            stat_preAnno.s_N$rep),"total"]
      
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(stat_preAnno.s_N$preAnno)){
        glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
##               EMN1      EMN2      EMN3      EMN4      EMN5      IMN1      IMN2
## CTLvsCKO 0.0194322 0.5587671 0.3886185 0.3822187 0.4662382 0.4837981 0.5977187
##               IMN3      IMN4        IN1       IN2       IN3    IPAN1
## CTLvsCKO 0.3725829 0.5909909 0.04770969 0.4219428 0.3814654 0.465705
##                IPAN2     IPAN3     IPAN4
## CTLvsCKO 0.002575981 0.6452318 0.6279909
round(glm.nb_anova.preAnno_df,4)
##            EMN1   EMN2   EMN3   EMN4   EMN5   IMN1   IMN2   IMN3  IMN4    IN1
## CTLvsCKO 0.0194 0.5588 0.3886 0.3822 0.4662 0.4838 0.5977 0.3726 0.591 0.0477
##             IN2    IN3  IPAN1  IPAN2  IPAN3 IPAN4
## CTLvsCKO 0.4219 0.3815 0.4657 0.0026 0.6452 0.628